package mt;

import java.io.BufferedReader;
import java.io.BufferedWriter;

import main.U;


public abstract class C2EMT {
	
	public abstract String translate(String query) throws Exception;
	
	
	public static void main(String[] args) throws Exception {
		System.out.println(segment("asd  asdf  sdaf  "));
	}
	
	//filePath是翻译结果文件，每句一行
	public static void evaluate(String filePath) throws Exception{

	}
	
	public static void toTst(String in, String out) throws Exception {
		BufferedReader r = U.newUtf8Reader(in);
		BufferedWriter w = U.newUtf8writer(out);
	
		w
				.write("<tstset setid=\"test\" srclang=\"Chinese\" trglang=\"English\">\n");
		w.write("<DOC docid=\"news\" sysid=\"MT\">\n");
	
		int id = 0;
		while (true) {
			String l = r.readLine();
			if (null == l)
				break;
			id++;
			w.write("<seg id=\"" + id + "\"> " + segment(l) + " </seg>\n");
		}
	
		w.write("</DOC>\n");
		w.write("</tstset>\n");
	
		w.close();
		r.close();
	}
	
	public static String segment(String s) {
		//return s.replaceAll("([^ -~])", " $1 ");
		return s.replaceAll(",", " , ").replaceAll("\\.", " . ").replaceAll(":", " : ")
		.replaceAll("\\?", " ? ").replaceAll("!", " ! ").replaceAll(";", " : ")
		.replaceAll("\\(", " ( ").replaceAll("\\)", " ) ").replaceAll("\"", " \" ").replaceAll(" +", " ").trim();
	}
}